The towed in Hartford

Looking

library(dplyr)
library(lubridate)
library(leaflet)
library(ggmap)
library(knitr)
library(stringr)
library(geosphere)
library(ggplot2)
require(ggmap)
require(scales)
library(sp)
#install.packages("devtools")
#devtools::install_github("hrecht/censusapi")
library("censusapi")
tows <- read.csv("data/tows.csv", stringsAsFactors=F)

Cleaning and prepping the data

## There are many blank fields in the Tow Firm and Address column
## There are no blank fields in the phone number column
## So let's figure out tow firm name and address based on phone number

### Subset dataframe of complete data
tows_sub <- subset(tows, Tow_Firm!="")
tows_sub <- subset(tows_sub, !duplicated(Tow_Firm))

# Getting rid of one of the tow firm names who used a phone number but two different names
tows_sub <- subset(tows_sub, Tow_Firm!="CROSS COUNTRY AUTO")


#### Geolocate tow firms
geo <- geocode(location = tows_sub$Tow_Firm_Address, output="latlon", source="google")
tows_sub <- cbind(tows_sub, geo)

#### An array of 21 colors just for fun
color <- data.frame("#29e908", 
                     "#0a5cee", 
                     "#8d480c", 
                     "#8edeb7", 
                     "#c9e746", 
                     "#96bab2", 
                     "#0fe5a2", 
                     "#6a5c5b", 
                     "#19cdfd", 
                     "#279fe6", 
                     "#7ac150", 
                     "#660e6e", 
                     "#095a21", 
                     "#dfe142", 
                     "#786839", 
                     "#f5657c", 
                     "#4decd2", 
                     "#4eb06f", 
                     "#fdc200", 
                     "#08d479", 
                     "#b2cca8")
color <- data.frame(t(color))
rownames(color) <- NULL

#### Adding the colors to the tow firm name dataframe
tows_sub <- cbind(tows_sub, color)

#### Delete Tow_Firm and Tow_Firm_Address columns in original dataframe
tows <- tows[,-3]
tows <- tows[,-3]

#### Prep dataframe for joining
tows_sub <- tows_sub[c("Tow_Firm", "Tow_Firm_Address", "Tow_Firm_Phone", "lon", "lat", "t.color.")]

#### Join the dataframes
tows <- left_join(tows, tows_sub)

#### Clean up time and dates
tows$Date <- ymd(tows$Date)
tows$Time <- hms(tows$Time)
tows$created_at <- ymd_hms(tows$created_at)
tows$created_at <- ymd_hms(tows$updated_at)
tows$removed_at <- ymd_hms(tows$removed_at)


#### Prepping Lat/Lon
tows$tow_lon <- gsub(",.*", "", tows$geom)
tows$tow_lat <- gsub(".*,", "", tows$geom)

Exploring the data

About 20682 vehicles have been towed in Hartford since 2015.

There are 21 towing companies in the city.

3455 vehicles had no license plate listed.

Where are the vehicles towed from?

leaflet(tows) %>% addTiles('http://{s}.basemaps.cartocdn.com/dark_all/{z}/{x}/{y}.png', attribution='Map tiles by <a href="http://stamen.com">Stamen Design</a>, <a href="http://creativecommons.org/licenses/by/3.0">CC BY 3.0</a> &mdash; Map data &copy; <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a>') %>% 
  setView(-72.690940, 41.751426, zoom = 12) %>% 
  addCircles(~tow_lon, ~tow_lat, popup=tows$Make, weight = 3, radius=40, 
             color="#ffa500", stroke = TRUE, fillOpacity = 0.8) %>% 
  addLegend("bottomright", colors= "#ffa500", labels="Towed'", title="In Hartford")

Where are vehicles towed to?

leaflet(tows) %>% addTiles('http://{s}.basemaps.cartocdn.com/dark_all/{z}/{x}/{y}.png', attribution='Map tiles by <a href="http://stamen.com">Stamen Design</a>, <a href="http://creativecommons.org/licenses/by/3.0">CC BY 3.0</a> &mdash; Map data &copy; <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a>') %>% 
  setView(-72.690940, 41.751426, zoom = 12) %>% 
  addCircles(~lon, ~lat, popup=tows$Make, weight = 3, radius=40, 
             color="#ffa500", stroke = TRUE, fillOpacity = 0.8) %>% 
  addLegend("bottomright", colors= "#ffa500", labels="Towed'", title="In Hartford")

Most common year?

years <- tows %>%
  group_by(Vehicle_Year) %>%
  summarise(count=n()) %>%
  arrange(-count)

kable(head(years,10))
Vehicle_Year count
2001 1506
2003 1444
2000 1418
2002 1402
2004 1206
2005 1151
1999 1129
2006 1048
2007 923
1998 914

Most common vehicle?

make <- tows %>%
  group_by(Make) %>%
  summarise(count=n()) %>%
  arrange(-count)

kable(head(make, 10))
Make count
HONDA 3353
NISSAN 2590
TOYOTA 1869
FORD 1495
ACURA 976
DODGE 786
CHEVY 772
HYUNDAI 595
MAZDA 426
JEEP 423

Most common model?

model <- tows %>%
  group_by(Model) %>%
  summarise(count=n()) %>%
  arrange(-count)

kable(head(model,10))
Model count
ACCORD 1826
MAXIMA 997
ALTIMA 848
CIVIC 805
CAMRY 729
COROLLA 505
TL 281
SENTRA 278
ELANTRA 239
SONATA 232

Any particular color?

color <- tows %>%
  group_by(Color) %>%
  summarise(count=n()) %>%
  arrange(-count)

kable(head(color,10))
Color count
GRAY 2614
BLACK 2208
RED 2011
WHITE 1931
BLUE 1863
GRY 1779
TAN 1585
GREEN 1196
BLK 1186
SILVER 905

Most common time of day towed?

tows$hour <- hour(tows$Time)

hour <- tows %>%
  group_by(hour) %>%
  summarise(count=n()) %>%
  arrange(-count)

kable(head(hour,10))
hour count
13 1444
12 1428
14 1256
15 1085
16 1032
17 1028
6 863
5 854
1 843
11 835
ggplot(tows, aes(x=hour)) + geom_histogram(binwidth=1)

Bad-luck drivers– who’s towed more often?

plates <- tows %>%
  group_by(Vehicle_Plate) %>%
  summarise(count=n()) %>%
  arrange(-count)

kable(head(plates,10))
Vehicle_Plate count
3455
NA 22
NONE 17
6AFSU6 11
4ARHP7 9
7AFST5 9
8APVS9 9
7AAEE1 8
894RZU 8
AA23938 8

Which tow yards are most prolific?

firms <- tows %>%
  group_by(Tow_Firm) %>%
  summarise(count=n()) %>%
  arrange(-count)

kable(head(firms,10))
Tow_Firm count
CROSS COUNTRY TOWING 3135
WHITEY’S FRAME SHOP 1765
CORONA’S AUTO PARTS,INC. 1673
FRIENDLY AUTO BODY & TOWING 1659
MODERN GARAGE 1512
A & N AUTO 1482
METRO AUTOBODY & TOWING INC 1450
BENTON AUTO BODY 1438
RENO’S AUTO BODY 1437
CAPITOL TOWING 1246

Most common address?

address <- tows %>%
  group_by(Tow_From_Address) %>%
  summarise(count=n()) %>%
  arrange(-count)

kable(head(address, 10))
Tow_From_Address count
1000 MAIN ST 180
200 BLOOMFIELD AV 157
100 WESTON ST 144
967 ASYLUM AV 140
105 SHERBROOKE AV 116
265 WASHINGTON ST 99
200 PEARL ST 97
253 HIGH ST 93
2995 MAIN ST 91
1429 PARK ST 80

Most common address and tow company?

tow_address <- tows %>%
  group_by(Tow_From_Address, Tow_Firm) %>%
  summarise(count=n()) %>%
  arrange(-count)

kable(head(tow_address, 20))
Tow_From_Address Tow_Firm count
200 BLOOMFIELD AV CROSS COUNTRY TOWING 78
105 SHERBROOKE AV CROSS COUNTRY TOWING 36
1000 MAIN ST CROSS COUNTRY TOWING 34
100 WESTON ST CORONA’S AUTO PARTS,INC. 30
57 SUMNER ST CROSS COUNTRY TOWING 29
100 WESTON ST INTERNATIONAL TOWING COMPANY 27
100 WESTON ST METRO AUTOBODY & TOWING INC 26
198 SIGOURNEY ST CROSS COUNTRY TOWING 22
887 ASYLUM AV CROSS COUNTRY TOWING 22
1000 MAIN ST CORONA’S AUTO PARTS,INC. 21
967 ASYLUM AV CORONA’S AUTO PARTS,INC. 21
28 TOWNLEY ST CROSS COUNTRY TOWING 20
1 WESTON PARK RD FRIENDLY AUTO BODY & TOWING 18
265 WASHINGTON ST CROSS COUNTRY TOWING 18
265 WASHINGTON ST MODERN GARAGE 18
200 PEARL ST CROSS COUNTRY TOWING 17
967 ASYLUM AV CAPITOL TOWING 17
100 WESTON ST WHITEY’S FRAME SHOP 16
1450 MAIN ST CROSS COUNTRY TOWING 16
31 GILLETT ST CROSS COUNTRY TOWING 16

Where do these tow yards target?

leaflet(tows) %>% addTiles('http://{s}.basemaps.cartocdn.com/dark_all/{z}/{x}/{y}.png', attribution='Map tiles by <a href="http://stamen.com">Stamen Design</a>, <a href="http://creativecommons.org/licenses/by/3.0">CC BY 3.0</a> &mdash; Map data &copy; <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a>') %>% 
  setView(-72.690940, 41.751426, zoom = 12) %>% 
  addCircles(~tow_lon, ~tow_lat, popup=tows$Tow_Firm, weight = 3, radius=40, 
             color=tows$t.color., stroke = TRUE, fillOpacity = 0.8) %>% 
  addLegend("bottomright", colors= "#ffa500", labels="Towed'", title="In Hartford")

What neighborhoods?

# Bring in the shape files for census tracts

require(rgdal)

# dsn is the folder the shape files are in. layer is the name of the file.
towntracts <- readOGR(dsn="maps", layer="census_tracts")
## OGR data source with driver: ESRI Shapefile 
## Source: "maps", layer: "census_tracts"
## with 833 features
## It has 14 fields
# creating a copy
towntracts_only <- towntracts

# turn the shapefile into a dataframe that can be worked on in R
require(maptools)
require(ggplot2)

towntracts <- fortify(towntracts, region="GEOID10")

# We only need the columns with the latitude and longitude
coords <- tows[c("tow_lon", "tow_lat")]

# Making sure we are working with rows that don't have any blanks
coords$tow_lon <- as.numeric(coords$tow_lon)
coords$tow_lat <- as.numeric(coords$tow_lat)
coords <- coords[complete.cases(coords),]

# Letting R know that these are specifically spatial coordinates
sp <- SpatialPoints(coords)

# Applying projections to the coordinates so they match up with the shapefile we're joining them with
# More projections information http://trac.osgeo.org/proj/wiki/GenParms 
proj4string(sp) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
proj4string(sp)
## [1] "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
# Rendering the census tracts
plot(towntracts_only)

# Adding the coordinates of the traffic stops
plot(sp, col="red" , add=TRUE)

by_tract <- over(sp, towntracts_only)

by_tract <- by_tract %>%
  group_by(GEOID10) %>%
  summarise(total=n())

colnames(by_tract) <- c("id", "total")

by_tract$id <- as.character(by_tract$id)

# Bring in a dataframe that has matches census tract ID numbers to town names
tracts2towns <- read.csv("data/tracts_to_towns.csv", stringsAsFactors=FALSE)

# Changing the column names so it can be joined to the by_tract dataframe
colnames(tracts2towns) <- c("id", "town_name")

# Changing the GEOID number to character so it can be joined to the by_tract dataframe
tracts2towns$id <- as.character(tracts2towns$id)

# Adding a 0 to the front of the GEOID string because it was originally left out when it was imported
tracts2towns$id <- paste0("0", tracts2towns$id)## Distance between yards and tow locations?

# Eliminating leading and trailing white space just in case
tracts2towns$town_name <- str_trim(tracts2towns$town_name)

# Joining the by_tract dataframe to the tracts2towns dataframe

by_tract <- left_join(by_tract, tracts2towns)

total_map <- left_join(towntracts, by_tract)

total_map <- subset(total_map, !is.na(total))
townborders <- readOGR(dsn="maps", layer="ctgeo")
## OGR data source with driver: ESRI Shapefile 
## Source: "maps", layer: "ctgeo"
## with 169 features
## It has 6 fields
townborders_only <- townborders
townborders<- fortify(townborders, region="NAME10")

# Subset the town borders to just Hamden since that's the department we're looking at
town_borders <- subset(townborders, id=="Hartford")

tm_ct <- ggplot() +
  geom_polygon(data = total_map, aes(x=long, y=lat, group=group, fill=total), color = "black", size=0.2) +
  geom_polygon(data = town_borders, aes(x=long, y=lat, group=group, fill=total), color = "black", fill=NA, size=0.5) +
  coord_map() +
  scale_fill_distiller(type="seq", trans="reverse", palette = "Reds", breaks=pretty_breaks(n=10)) +
  theme_nothing(legend=TRUE) +
  labs(title="Where vehicles are towed from", fill="")
print(tm_ct)

Compared to census data

# Loading my census key from an external script
source("keys.R")

# Replace census_key below with "your_own_key_whatever_it_is"
# Apply for one here http://api.census.gov/data/key_signup.html

race_tracts <- getCensus(name="acs5",
                         vintage=2014,
                         key=census_key,
                         vars=c("NAME", "B02001_001E", "B02001_002E"),
                         region="tract:*", regionin="state:09")

# What did we just do?



# I pulled the following population data for all census tracts in state 09, which is Connecticut

# B02001_001E - Total
# B02001_002E - White alone

# ok, let's clean this up
race_tracts$NAME <- NULL

# Creating a new column for the GEOID that can be joined with the dataframe we already have
race_tracts$id <- paste0(race_tracts$state, race_tracts$county, race_tracts$tract)

# Renaming the column names for clarity
colnames(race_tracts) <- c("state_code", "county_code", "tract_code", "total_pop", "white_pop", "id")

# Determining the minority population by subtracting the white population from the total
race_tracts$minority_pop <- race_tracts$total_pop - race_tracts$white_pop

# Now figuring out the percent makeup of each census tract
race_tracts$white_pop_p <- round(race_tracts$white_pop/race_tracts$total_pop*100,2)
race_tracts$minority_pop_p <- round(race_tracts$minority_pop/race_tracts$total_pop*100,2)

# Joining the two datframes
joined_tracts <- left_join(total_map, race_tracts)


# Mapping population

tm_ct <- ggplot() +
  geom_polygon(data = joined_tracts, aes(x=long, y=lat, group=group, fill=total_pop), color = "black", size=0.2) +
  geom_polygon(data = town_borders, aes(x=long, y=lat, group=group, fill=total), color = "black", fill=NA, size=0.5) +
  coord_map() +
  scale_fill_distiller(type="seq", trans="reverse", palette = "Reds", breaks=pretty_breaks(n=10)) +
  theme_nothing(legend=TRUE) +
  labs(title="Hartford population", fill="")
print(tm_ct)

# Minority pop

tm_ct <- ggplot() +
  geom_polygon(data = joined_tracts, aes(x=long, y=lat, group=group, fill=minority_pop_p), color = "black", size=0.2) +
  geom_polygon(data = town_borders, aes(x=long, y=lat, group=group, fill=total), color = "black", fill=NA, size=0.5) +
  coord_map() +
  scale_fill_distiller(type="seq", trans="reverse", palette = "Reds", breaks=pretty_breaks(n=10)) +
  theme_nothing(legend=TRUE) +
  labs(title="Hartford minority population", fill="")
print(tm_ct)

No correlation between population overall or minority population

How about restaurants?

# Downloading the data from the Hartford City Data Portal

restaurants2 <- read.csv("https://data.hartford.gov/api/views/cwxs-2pd8/rows.csv?accessType=DOWNLOAD")


restaurants2$lon <- gsub(".*, ", "", restaurants2$geom)
restaurants2$lon <- as.numeric(gsub("\\)", "", restaurants2$lon))


restaurants2$lat <- gsub(",.*", "", restaurants2$geom)
restaurants2$lat <- as.numeric(gsub("\\(", "", restaurants2$lat))

coords <- restaurants2[c("lon", "lat")]

coords <- coords[complete.cases(coords),]
sp <- SpatialPoints(coords)
proj4string(sp) <- "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
proj4string(sp)
## [1] "+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0"
by_tract <- over(sp, towntracts_only)

by_tract <- by_tract %>%
  group_by(GEOID10) %>%
  summarise(total=n())

colnames(by_tract) <- c("id", "total")

by_tract$id <- as.character(by_tract$id)

tracts2towns <- read.csv("data/tracts_to_towns.csv", stringsAsFactors=FALSE)

colnames(tracts2towns) <- c("id", "town_name")

tracts2towns$id <- as.character(tracts2towns$id)

tracts2towns$id <- paste0("0", tracts2towns$id)## Distance between yards and tow locations?

tracts2towns$town_name <- str_trim(tracts2towns$town_name)

by_tract <- left_join(by_tract, tracts2towns)

total_map <- left_join(towntracts, by_tract)


total_map <- subset(total_map, !is.na(total))

tm_ct <- ggplot() +
  geom_polygon(data = total_map, aes(x=long, y=lat, group=group, fill=total), color = "black", size=0.2) +
  coord_map() +
  scale_fill_distiller(type="seq", trans="reverse", palette = "Reds", breaks=pretty_breaks(n=10)) +
  theme_nothing(legend=TRUE) +
  labs(title="Where restaurants are", fill="")
print(tm_ct)

What’s the average distance for tows per towing company?

tows$lat <- as.numeric(tows$lat)
tows$lon <- as.numeric(tows$lon)
tows$tow_lat <- as.numeric(tows$tow_lat)
tows$tow_lon <- as.numeric(tows$tow_lon)

# tows$lonlat <- paste0(tows$lat, ", ", tows$lon)
# tows$t_lonlat <- paste0(tows$tow_lat, ", ", tows$tow_lon)

tows_no_na <- subset(tows, !is.na(tow_lat))

tows_no_na$distance <- 0

for (i in 1:nrow(tows_no_na)) {
  tows_no_na$distance[i] <- distm(c(tows_no_na$lat[i], tows_no_na$lon[i]), c(tows_no_na$tow_lat[i], tows_no_na$tow_lon[i]), fun=distHaversine)
}

tows_no_na$miles <- tows_no_na$distance * 0.00062137

## average miles per firm

avg_miles <- tows_no_na %>%
  group_by(Tow_Firm) %>%
  summarise(Avg_Meters=mean(distance)) %>%
  mutate(Avg_Miles=Avg_Meters*0.00062137) %>%
  arrange(-Avg_Miles)

kable(avg_miles)
Tow_Firm Avg_Meters Avg_Miles
AUTO LOCK 18927.073 11.7607153
CARON TOWING 8822.103 5.4817904
A & M TOWING 7737.571 4.8078945
D & L TOWING 6490.325 4.0328934
INTERNATIONAL TOWING COMPANY 5378.759 3.3421993
CENTRAL AUTO & TRANSPORT, LLC 4051.391 2.5174128
WHITEY’S FRAME SHOP 3660.922 2.2747873
ELMWOOD AUTOMOTIVE 3335.539 2.0726036
CROSS COUNTRY TOWING 2787.113 1.7318286
MEDINA AUTO BODY 2783.181 1.7293855
MODERN GARAGE 2690.880 1.6720319
CAPITOL AUTOMOTIVE LLC 2496.788 1.5514291
METRO AUTOBODY & TOWING INC 2432.902 1.5117323
CORONA’S AUTO PARTS,INC. 2413.964 1.4999646
FRIENDLY AUTO BODY & TOWING 2390.113 1.4851446
FOUR ACES 2343.062 1.4559085
RENO’S AUTO BODY 2112.391 1.3125762
A & N AUTO 2006.977 1.2470755
CENTRAL GARAGE 1922.735 1.1947299
BENTON AUTO BODY 1723.905 1.0711828
CAPITOL TOWING 1444.760 0.8977306

Heatmap

joined_tracts2 <- subset(joined_tracts, town_name=="Hartford")


hartbox <- make_bbox(lon = tows_no_na$tow_lon, lat =tows_no_na$tow_lat, f = .1)
hart_map <- get_map(location = hartbox, maptype = "roadmap", source = "google")

pm_ct <- ggmap(hart_map) 
pm_ct <- pm_ct + stat_density2d(data = tows_no_na, show.legend=F, aes(x=tow_lon, y=tow_lat, fill=..level.., alpha=..level..),  geom="polygon",size=.5,bins=10)
pm_ct <- pm_ct + scale_fill_gradient(low="purple", high="firebrick1", name="Distribution")
pm_ct <- pm_ct + coord_fixed()
pm_ct <- pm_ct + theme_nothing(legend=TRUE) 
pm_ct <- pm_ct + labs(x=NULL, y=NULL, title="Where tow yards target")
pm_ct <- pm_ct + theme(plot.title=element_text(face="bold", hjust=.4))
pm_ct <- pm_ct + theme(plot.subtitle=element_text(face="italic", size=9, margin=margin(l=20)))
pm_ct <- pm_ct + theme(plot.caption=element_text(size=12, margin=margin(t=12), color="#7a7d7e", hjust=0))

print(pm_ct)

Heatmap per towing company

pm_ct <- ggmap(hart_map)
pm_ct <- pm_ct + stat_density2d(data = tows_no_na, show.legend=F, aes(x=tow_lon, y=tow_lat, fill=..level.., alpha=..level..),  geom="polygon",size=.5,bins=10)
pm_ct <- pm_ct + geom_polygon(data = joined_tracts2, aes(x=long, y=lat, group=group), fill=NA, color = "black", size=0.2) 
pm_ct <- pm_ct + geom_polygon(data = town_borders, aes(x=long, y=lat, group=group), fill=NA, color = "black", size=0.4)
#pm_ct <- pm_ct + geom_polygon(data = ct_only, aes(x=long, y=lat, group=group), fill="seagreen2", color = "gray93", size=0.2)
#pm_ct <- pm_ct + gg_circle(r=9, xc=-73, yc=42, color="white", fill=NA, alpha=0.2, size=40) 
pm_ct <- pm_ct + scale_fill_gradient(low="deepskyblue2", high="firebrick1", name="Distribution")
#pm_ct <- pm_ct + scale_fill_discrete()
#extra_lat <- c(46.358685, 35.872715)
#extra_lon <- c(-64.209938, -79.735653)
#pm_ct <- pm_ct + theme(legend.position="top",  legend.key = element_blank())
pm_ct <- pm_ct + coord_fixed()
pm_ct <- pm_ct + theme_nothing(legend=TRUE) 
pm_ct <- pm_ct + labs(x=NULL, y=NULL, title="Where tow yards target")
pm_ct <- pm_ct + facet_wrap(~Tow_Firm)
#pm_ct <- pm_ct + theme(text = element_text(size=15), panel.background = element_rect(fill = 'gray93', color=NA))
pm_ct <- pm_ct + theme(plot.title=element_text(face="bold", hjust=.4))
pm_ct <- pm_ct + theme(plot.subtitle=element_text(face="italic", size=9, margin=margin(l=20)))
pm_ct <- pm_ct + theme(plot.caption=element_text(size=12, margin=margin(t=12), color="#7a7d7e", hjust=0))
#pm_ct <- pm_ct + theme(legend.key.size = unit(1, "cm"))

print(pm_ct)